This is the code for the statistical analysis for “Vowel Acoustics as Predictors of Speech Intelligibility in Dysarthria.”
devtools::install_github("AustinRThompson/mslTools")
Downloading GitHub repo AustinRThompson/mslTools@HEAD
These packages have more recent versions available.
It is recommended to update all of them.
Which would you like to update?
1: All
2: CRAN packages only
3: None
4: openssl (2.0.0 -> 2.0.1) [CRAN]
1
openssl (2.0.0 -> 2.0.1) [CRAN]
Installing 1 packages: openssl
There is a binary version available but the source version is later:
yes
installing the source package ‘openssl’
trying URL 'https://cran.rstudio.com/src/contrib/openssl_2.0.1.tar.gz'
Content type 'application/x-gzip' length 1204569 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
* installing *source* package ‘openssl’ ...
** package ‘openssl’ successfully unpacked and MD5 sums checked
** using staged installation
Homebrew 2.4.5
Homebrew/homebrew-core (git revision d2eb63; last commit 2020-07-11)
Using PKG_CFLAGS=-I/usr/local/opt/openssl/include -I/usr/local/opt/openssl@1.1/include
--------------------------- [ANTICONF] --------------------------------
Configuration failed because openssl was not found. Try installing:
* deb: libssl-dev (Debian, Ubuntu, etc)
* rpm: openssl-devel (Fedora, CentOS, RHEL)
* csw: libssl_dev (Solaris)
* brew: openssl (Mac OSX)
If openssl is already installed, check that 'pkg-config' is in your
PATH and PKG_CONFIG_PATH contains a openssl.pc file. If pkg-config
is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:
R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'
-------------------------- [ERROR MESSAGE] ---------------------------
tools/version.c:1:10: fatal error: 'openssl/opensslv.h' file not found
#include <openssl/opensslv.h>
^~~~~~~~~~~~~~~~~~~~
1 error generated.
--------------------------------------------------------------------
ERROR: configuration failed for package ‘openssl’
* removing ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library/openssl’
* restoring previous ‘/Library/Frameworks/R.framework/Versions/4.0/Resources/library/openssl’
installation of package ‘openssl’ had non-zero exit status
The downloaded source packages are in
‘/private/var/folders/2s/c8b_6s1s5tgcqqt271kpjw4r0000gn/T/RtmpPx425O/downloaded_packages’
checking for file ‘/private/var/folders/2s/c8b_6s1s5tgcqqt271kpjw4r0000gn/T/RtmpPx425O/remotes58b052e11089/AustinRThompson-mslTools-58d506d/DESCRIPTION’ ...
✔ checking for file ‘/private/var/folders/2s/c8b_6s1s5tgcqqt271kpjw4r0000gn/T/RtmpPx425O/remotes58b052e11089/AustinRThompson-mslTools-58d506d/DESCRIPTION’ (2.3s)
─ preparing ‘mslTools’:
checking DESCRIPTION meta-information ...
✔ checking DESCRIPTION meta-information
─ checking for LF line-endings in source and make files and shell scripts (585ms)
─ checking for empty or unneeded directories
─ building ‘mslTools_0.0.0.9000.tar.gz’
* installing *source* package ‘mslTools’ ...
** using staged installation
** R
** byte-compile and prepare package for lazy loading
Warning messages:
1: package ‘tidyr’ was built under R version 4.0.5
2: package ‘readr’ was built under R version 4.0.5
** help
No man pages found in package ‘mslTools’
*** installing help indices
*** copying figures
** building package indices
** testing if installed package can be loaded from temporary location
Warning: package ‘tidyr’ was built under R version 4.0.5
Warning: package ‘readr’ was built under R version 4.0.5
** testing if installed package can be loaded from final location
Warning: package ‘tidyr’ was built under R version 4.0.5
Warning: package ‘readr’ was built under R version 4.0.5
** testing if installed package keeps a record of temporary installation path
* DONE (mslTools)
Reliability <- rio::import("Prepped Data/Reliability Data.csv")
AcousticData <- rio::import("Prepped Data/AcousticMeasures.csv") %>%
dplyr::mutate(intDiff = VAS - transAcc)
AcousticData <- AcousticData %>%
dplyr::filter(!grepl("_rel", Speaker)) %>%
dplyr::select(c(Speaker, Sex, Etiology, vowel_ED_b, VSA_b, autoVSA,
Hull_b,Hull_bVSD_25, Hull_bVSD_50, Hull_bVSD_75,
VAS, transAcc)) %>%
dplyr::mutate(Etiology = as.factor(Etiology),
Sex = as.factor(Sex),
Speaker = as.factor(Speaker))
Listeners <- rio::import("Prepped Data/Listener_Demographics.csv") %>%
dplyr::select(!c(StartDate:proloficID, Q2.4_6_TEXT, Q3.2_8_TEXT, AudioCheck:EP3))
Listeners$race[Listeners$Q3.3_7_TEXT == "Native American/ African amercing"] <- "Biracial or Multiracial"
Two raters (the first two authors) completed vowel segmentation for the speakers. To calculate inter-rater reliability, 20% of the speakers were segmented again by the other rater. Two-way intraclass coefficients were computed for the extracted F1 and F2 from the temporal midpoint of the vowel segments. Since only one set of ratings will be used in the data analysis, we focus on the single ICC results and interpretation. However, we also report the average ICC values to be comprehensive.
## Creating new data frames to calculate ICC for extracted F1 and F2 values
F1_Rel <- Reliability %>%
dplyr::select(c(F1, F1_rel))
F2_Rel <- Reliability %>%
dplyr::select(c(F2, F2_rel))
## Single ICC for F1
Single_F1 <- irr::icc(F1_Rel, model = "twoway", type = "agreement", unit = "single")
## Average ICC for F1
Average_F1 <- irr::icc(F1_Rel, model = "twoway", type = "agreement", unit = "average")
## Single ICC for F2
Single_F2 <- irr::icc(F2_Rel, model = "twoway", type = "agreement", unit = "single")
## Average ICC for F2
Average_F2 <- irr::icc(F2_Rel, model = "twoway", type = "agreement", unit = "average")
## Inter-rater reliability results and interpretation
print(paste("Single ICC for F1 is ", round(Single_F1$value, digits = 3), ". ",
"The 95% CI is [", round(Single_F1$lbound, digits = 3), " - ", round(Single_F1$ubound, digits = 3), "].", sep = ""))
[1] "Single ICC for F1 is 0.866. The 95% CI is [0.837 - 0.89]."
print(paste("Single ICC for F2 is ", round(Single_F2$value, digits = 3), ". ",
"The 95% CI is [", round(Single_F2$lbound, digits = 3), " - ", round(Single_F2$ubound, digits = 3), "].", sep = ""))
[1] "Single ICC for F2 is 0.931. The 95% CI is [0.916 - 0.944]."
print(paste("Average ICC for F1 is ", round(Average_F1$value, digits = 3), ". ",
"The 95% CI is [", round(Average_F1$lbound, digits = 3), " - ", round(Average_F1$ubound, digits = 3), "].", sep = ""))
[1] "Average ICC for F1 is 0.928. The 95% CI is [0.911 - 0.942]."
print(paste("Average ICC for F2 is ", round(Average_F2$value, digits = 3), ". ",
"The 95% CI is [", round(Average_F2$lbound, digits = 3), " - ", round(Average_F2$ubound, digits = 3), "].", sep = ""))
[1] "Average ICC for F2 is 0.964. The 95% CI is [0.956 - 0.971]."
print("Thus, interrater reliability for the extracted F1 and F2 values from the vowel segments was good to excellent.")
[1] "Thus, interrater reliability for the extracted F1 and F2 values from the vowel segments was good to excellent."
## Removing extra data frames from environment
rm(F1_Rel, F2_Rel, Reliability, Single_F1, Single_F2, Average_F1, Average_F2)
CorrMatrix <- AcousticData %>%
dplyr::select(VSA_b, vowel_ED_b, Hull_b, Hull_bVSD_25, Hull_bVSD_75, VAS, transAcc) %>%
as.matrix() %>%
Hmisc::rcorr()
CorrMatrixP <- CorrMatrix$P < .05
CorrMatrix <- CorrMatrix$r
stats::cor.test(AcousticData$VSA_b, AcousticData$vowel_ED_b, method = "pearson")
Pearson's product-moment correlation
data: AcousticData$VSA_b and AcousticData$vowel_ED_b
t = 6.5285, df = 38, p-value = 1.076e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5372669 0.8468014
sample estimates:
cor
0.7270881
stats::cor.test(AcousticData$Hull_b, AcousticData$Hull_bVSD_25, method = "pearson")
Pearson's product-moment correlation
data: AcousticData$Hull_b and AcousticData$Hull_bVSD_25
t = 9.4906, df = 38, p-value = 1.43e-11
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7135136 0.9119080
sample estimates:
cor
0.838625
write.csv(CorrMatrix, file = "Tables/Correlation Matrix.csv")
rm(CorrMatrix)
# Specifying Model 1
OT_Model1 <- lm(transAcc ~ Hull_bVSD_25, data = AcousticData)
## Model 1 Assumptions
performance::check_model(OT_Model1)
## Model 1 Summary
summary(OT_Model1)
Call:
lm(formula = transAcc ~ Hull_bVSD_25, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.896 -14.090 5.996 17.470 36.105
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 48.2973 9.7372 4.960 1.5e-05 ***
Hull_bVSD_25 0.6417 0.5663 1.133 0.264
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.83 on 38 degrees of freedom
Multiple R-squared: 0.03268, Adjusted R-squared: 0.007224
F-statistic: 1.284 on 1 and 38 DF, p-value: 0.2643
## Specifying Model 2
OT_Model2 <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
## Model 2 Assumption Check
performance::check_model(OT_Model2)
## Model 2 Summary
summary(OT_Model2)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-48.854 -13.962 5.567 16.758 36.257
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 47.3374 10.3316 4.582 5.09e-05 ***
Hull_bVSD_25 0.8045 0.7781 1.034 0.308
Hull_bVSD_75 -0.7188 2.3225 -0.309 0.759
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.11 on 37 degrees of freedom
Multiple R-squared: 0.03518, Adjusted R-squared: -0.01698
F-statistic: 0.6745 on 2 and 37 DF, p-value: 0.5156
## Model 1 and Model 2 Comparison
anova(OT_Model1, OT_Model2)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75
Res.Df RSS Df Sum of Sq F Pr(>F)
1 38 21570
2 37 21514 1 55.696 0.0958 0.7587
## Specifying Model 3
OT_Model3a <- lm(transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(OT_Model3a)
performance::check_collinearity(OT_Model3a)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 2.00 1.41 0.50
Hull_b 3.65 1.91 0.27
Moderate Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_25 5.33 2.31 0.19
## Model 3 Summary
summary(OT_Model3a)
Call:
lm(formula = transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b,
data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-56.706 -13.157 7.018 17.957 29.990
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 31.0806 14.4916 2.145 0.0388 *
Hull_bVSD_25 -0.8439 1.2984 -0.650 0.5199
Hull_bVSD_75 0.3159 2.3714 0.133 0.8948
Hull_b 1.2941 0.8247 1.569 0.1253
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.65 on 36 degrees of freedom
Multiple R-squared: 0.09695, Adjusted R-squared: 0.0217
F-statistic: 1.288 on 3 and 36 DF, p-value: 0.2932
## Model 2 and Model 3 Comparison
anova(OT_Model2, OT_Model3a)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75
Model 2: transAcc ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 21514
2 36 20137 1 1377.5 2.4626 0.1253
## Specifying Model 3
OT_Model3b <- lm(transAcc ~ Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(OT_Model3b)
performance::check_collinearity(OT_Model3b)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 1.26 1.12 0.79
Hull_b 1.26 1.12 0.79
## Model 3 Summary
summary(OT_Model3b)
Call:
lm(formula = transAcc ~ Hull_bVSD_75 + Hull_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-54.371 -12.860 5.038 17.725 31.609
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.2337 13.9973 2.374 0.0229 *
Hull_bVSD_75 -0.6193 1.8702 -0.331 0.7424
Hull_b 0.8605 0.4809 1.789 0.0818 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.47 on 37 degrees of freedom
Multiple R-squared: 0.08635, Adjusted R-squared: 0.03697
F-statistic: 1.749 on 2 and 37 DF, p-value: 0.1881
## Specifying Model 4
OT_Model4 <- lm(transAcc ~ Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(OT_Model4)
performance::check_collinearity(OT_Model4)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 1.26 1.12 0.79
Hull_b 1.68 1.30 0.60
VSA_b 1.45 1.20 0.69
## Model 4 Summary
summary(OT_Model4)
Call:
lm(formula = transAcc ~ Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.705 -12.573 3.108 14.504 35.215
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 30.7726 12.7686 2.410 0.02119 *
Hull_bVSD_75 -0.8216 1.7037 -0.482 0.63256
Hull_b 0.1202 0.5049 0.238 0.81322
VSA_b 5.8426 1.9859 2.942 0.00567 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 21.36 on 36 degrees of freedom
Multiple R-squared: 0.2634, Adjusted R-squared: 0.2021
F-statistic: 4.292 on 3 and 36 DF, p-value: 0.01092
## Model 3 and Model 4 Comparison
anova(OT_Model3b, OT_Model4)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_75 + Hull_b
Model 2: transAcc ~ Hull_bVSD_75 + Hull_b + VSA_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 20373
2 36 16424 1 3948.9 8.6555 0.005673 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Specifying Model 5
OT_Model5 <- lm(transAcc ~ Hull_bVSD_75 + VSA_b + vowel_ED_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(OT_Model5)
performance::check_collinearity(OT_Model5)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 1.10 1.05 0.91
VSA_b 2.30 1.52 0.43
vowel_ED_b 2.15 1.47 0.46
## Model 4 Summary
summary(OT_Model5)
Call:
lm(formula = transAcc ~ Hull_bVSD_75 + VSA_b + vowel_ED_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-46.501 -11.965 2.561 14.356 33.849
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.4273 20.1254 1.214 0.2327
Hull_bVSD_75 -0.5814 1.5872 -0.366 0.7163
VSA_b 5.2219 2.4990 2.090 0.0438 *
vowel_ED_b 6.0028 12.7240 0.472 0.6399
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 21.31 on 36 degrees of freedom
Multiple R-squared: 0.2668, Adjusted R-squared: 0.2057
F-statistic: 4.367 on 3 and 36 DF, p-value: 0.0101
## Model 3 and Model 4 Comparison
anova(OT_Model4, OT_Model5)
Analysis of Variance Table
Model 1: transAcc ~ Hull_bVSD_75 + Hull_b + VSA_b
Model 2: transAcc ~ Hull_bVSD_75 + VSA_b + vowel_ED_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 36 16424
2 36 16349 0 75.229
## Specifying Final Model
OT_Model_final <- lm(transAcc ~ VSA_b, data = AcousticData)
## Final Model Assumption Check
performance::check_model(OT_Model_final)
## Final Model Summary
summary(OT_Model_final)
Call:
lm(formula = transAcc ~ VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-46.72 -12.69 2.97 14.37 35.39
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 32.508 7.857 4.138 0.000187 ***
VSA_b 5.872 1.613 3.641 0.000807 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 20.86 on 38 degrees of freedom
Multiple R-squared: 0.2586, Adjusted R-squared: 0.2391
F-statistic: 13.25 on 1 and 38 DF, p-value: 0.0008068
confint(OT_Model_final)
2.5 % 97.5 %
(Intercept) 16.602765 48.413532
VSA_b 2.606927 9.137097
# Specifying Model 1
VAS_Model1 <- lm(VAS ~ Hull_bVSD_25, data = AcousticData)
## Model 1 Assumptions
performance::check_model(VAS_Model1)
## Model 1 Summary
summary(VAS_Model1)
Call:
lm(formula = VAS ~ Hull_bVSD_25, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.625 -16.684 8.462 19.440 37.352
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.6328 10.7512 3.965 0.000313 ***
Hull_bVSD_25 0.5877 0.6253 0.940 0.353236
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.31 on 38 degrees of freedom
Multiple R-squared: 0.02272, Adjusted R-squared: -0.003001
F-statistic: 0.8833 on 1 and 38 DF, p-value: 0.3532
## Specifying Model 2
VAS_Model2 <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
## Model 2 Assumption Check
performance::check_model(VAS_Model2)
## Model 2 Summary
summary(VAS_Model2)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-47.850 -16.576 8.382 19.448 37.237
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.3384 11.4211 3.707 0.000684 ***
Hull_bVSD_25 0.6376 0.8602 0.741 0.463195
Hull_bVSD_75 -0.2204 2.5674 -0.086 0.932036
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.66 on 37 degrees of freedom
Multiple R-squared: 0.02291, Adjusted R-squared: -0.0299
F-statistic: 0.4338 on 2 and 37 DF, p-value: 0.6513
## Model 1 and Model 2 Comparison
anova(VAS_Model1, VAS_Model2)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75
Res.Df RSS Df Sum of Sq F Pr(>F)
1 38 26296
2 37 26291 1 5.239 0.0074 0.932
## Specifying Model 3
VAS_Model3a <- lm(VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(VAS_Model3a)
performance::check_collinearity(VAS_Model3a)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 2.00 1.41 0.50
Hull_b 3.65 1.91 0.27
Moderate Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_25 5.33 2.31 0.19
## Model 3 Summary
summary(VAS_Model3a)
Call:
lm(formula = VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-56.444 -18.989 6.121 18.036 32.281
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.0400 16.0599 1.559 0.128
Hull_bVSD_25 -1.1164 1.4389 -0.776 0.443
Hull_bVSD_75 0.8805 2.6280 0.335 0.740
Hull_b 1.3770 0.9139 1.507 0.141
Residual standard error: 26.21 on 36 degrees of freedom
Multiple R-squared: 0.08087, Adjusted R-squared: 0.00428
F-statistic: 1.056 on 3 and 36 DF, p-value: 0.3799
## Model 2 and Model 3 Comparison
anova(VAS_Model2, VAS_Model3a)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_25 + Hull_bVSD_75
Model 2: VAS ~ Hull_bVSD_25 + Hull_bVSD_75 + Hull_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 26291
2 36 24731 1 1559.6 2.2702 0.1406
This model removes VSD25 because of it’s high VIF value > 5.
## Specifying Model 3b
VAS_Model3b <- lm(VAS ~ Hull_bVSD_75 + Hull_b, data = AcousticData)
## Model 3 Assumption Check
performance::check_model(VAS_Model3b)
performance::check_collinearity(VAS_Model3b)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 1.26 1.12 0.79
Hull_b 1.26 1.12 0.79
## Model 3 Summary
summary(VAS_Model3b)
Call:
lm(formula = VAS ~ Hull_bVSD_75 + Hull_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-53.356 -19.394 8.036 21.298 33.412
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.8884 15.5503 1.793 0.0811 .
Hull_bVSD_75 -0.3567 2.0777 -0.172 0.8646
Hull_b 0.8034 0.5343 1.504 0.1412
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.07 on 37 degrees of freedom
Multiple R-squared: 0.0655, Adjusted R-squared: 0.01499
F-statistic: 1.297 on 2 and 37 DF, p-value: 0.2855
## Specifying Model 4
VAS_Model4 <- lm(VAS ~ Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
## Model 4 Assumption Check
performance::check_model(VAS_Model4)
performance::check_collinearity(VAS_Model4)
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
Hull_bVSD_75 1.26 1.12 0.79
Hull_b 1.68 1.30 0.60
VSA_b 1.45 1.20 0.69
## Model 4 Summary
summary(VAS_Model4)
Call:
lm(formula = VAS ~ Hull_bVSD_75 + Hull_b + VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-45.56 -15.17 6.40 16.64 42.62
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.522e+01 1.426e+01 1.768 0.08553 .
Hull_bVSD_75 -5.762e-01 1.903e+00 -0.303 0.76382
Hull_b 5.056e-05 5.640e-01 0.000 0.99993
VSA_b 6.340e+00 2.218e+00 2.858 0.00705 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.86 on 36 degrees of freedom
Multiple R-squared: 0.2383, Adjusted R-squared: 0.1748
F-statistic: 3.754 on 3 and 36 DF, p-value: 0.01916
## Model 3 and Model 4 Comparison
anova(VAS_Model3b, VAS_Model4)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_75 + Hull_b
Model 2: VAS ~ Hull_bVSD_75 + Hull_b + VSA_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 37 25145
2 36 20495 1 4649.8 8.1674 0.007046 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
## Specifying Model 5
VAS_Model5 <- lm(VAS ~ Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b, data = AcousticData)
## Model 5 Assumption Check
performance::check_model(VAS_Model5)
## Model 5 Summary
summary(VAS_Model5)
Call:
lm(formula = VAS ~ Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b,
data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-44.407 -13.917 7.397 16.408 41.322
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 16.61280 23.81590 0.698 0.490
Hull_bVSD_75 -0.40875 1.95955 -0.209 0.836
Hull_b -0.05465 0.58294 -0.094 0.926
VSA_b 5.49348 2.91674 1.883 0.068 .
vowel_ED_b 6.68540 14.72389 0.454 0.653
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.13 on 35 degrees of freedom
Multiple R-squared: 0.2428, Adjusted R-squared: 0.1562
F-statistic: 2.805 on 4 and 35 DF, p-value: 0.04041
## Model 4 and Model 5 Comparison
anova(VAS_Model4, VAS_Model5)
Analysis of Variance Table
Model 1: VAS ~ Hull_bVSD_75 + Hull_b + VSA_b
Model 2: VAS ~ Hull_bVSD_75 + Hull_b + VSA_b + vowel_ED_b
Res.Df RSS Df Sum of Sq F Pr(>F)
1 36 20495
2 35 20375 1 120.02 0.2062 0.6526
## Specifying Final Model
VAS_Model_final <- lm(VAS ~ VSA_b, data = AcousticData)
## Final Model Assumption Check
performance::check_model(VAS_Model_final)
## Final Model Summary
summary(VAS_Model_final)
Call:
lm(formula = VAS ~ VSA_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-44.956 -15.943 6.754 17.153 43.062
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.703 8.761 2.820 0.00760 **
VSA_b 6.163 1.798 3.427 0.00148 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 23.26 on 38 degrees of freedom
Multiple R-squared: 0.2361, Adjusted R-squared: 0.216
F-statistic: 11.74 on 1 and 38 DF, p-value: 0.001482
confint(VAS_Model_final)
2.5 % 97.5 %
(Intercept) 6.966936 42.438084
VSA_b 2.521887 9.803467
Model 1
# Specify Model
OT_VAS_model <- lm(transAcc ~ VAS*Etiology + VAS*Sex, data = AcousticData)
# Assumption Check
performance::check_model(OT_VAS_model)
# Model Results
summary(OT_VAS_model)
# Specify Final Model
OT_VAS_final <- lm(transAcc ~ VAS, data = AcousticData)
confint(OT_VAS_final)
# Model Results
summary(OT_VAS_final)
Looking at corner dispersion as the sole predictor.
# Specify Final Model
OT_cornDisp <- lm(transAcc ~ vowel_ED_b, data = AcousticData)
summary(OT_cornDisp)
Call:
lm(formula = transAcc ~ vowel_ED_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-53.949 -10.348 4.268 14.982 25.903
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.227 18.611 0.335 0.73977
vowel_ED_b 25.564 8.946 2.857 0.00689 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 21.98 on 38 degrees of freedom
Multiple R-squared: 0.1769, Adjusted R-squared: 0.1552
F-statistic: 8.165 on 1 and 38 DF, p-value: 0.006891
VAS_cornDisp <- lm(VAS ~ vowel_ED_b, data = AcousticData)
summary(VAS_cornDisp)
Call:
lm(formula = VAS ~ vowel_ED_b, data = AcousticData)
Residuals:
Min 1Q Median 3Q Max
-52.146 -13.413 7.142 18.719 32.964
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.859 20.636 -0.139 0.8905
vowel_ED_b 26.819 9.920 2.704 0.0102 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.37 on 38 degrees of freedom
Multiple R-squared: 0.1613, Adjusted R-squared: 0.1393
F-statistic: 7.31 on 1 and 38 DF, p-value: 0.0102
gtData <- AcousticData %>%
rbind(.,AcousticData %>%
dplyr::mutate(Etiology = "All Etiologies")) %>%
rbind(.,AcousticData %>%
rbind(.,AcousticData %>%
dplyr::mutate(Etiology = "All Etiologies")) %>%
dplyr::mutate(Sex = "All")) %>%
dplyr::mutate(Sex = as.factor(Sex),
Etiology = as.factor(Etiology)) %>%
dplyr::group_by(Sex, Etiology) %>%
dplyr::summarize(VSA_mean = mean(VSA_b, na.rm =T), VSA_sd = sd(VSA_b, na.rm = T),
Disp_mean = mean(vowel_ED_b, na.rm =T), Disp_sd = sd(vowel_ED_b, na.rm =T),
Hull_mean = mean(Hull_b, na.rm =T), Hull_sd = sd(Hull_b, na.rm =T),
VSD25_mean = mean(Hull_bVSD_25, na.rm =T), VSD25_sd = sd(Hull_bVSD_25, na.rm =T),
VSD50_mean = mean(Hull_bVSD_50, na.rm =T), VSD50_sd = sd(Hull_bVSD_50, na.rm =T),
VSD75_mean = mean(Hull_bVSD_75, na.rm =T), VSD75_sd = sd(Hull_bVSD_75, na.rm =T),
VAS_mean = mean(VAS, na.rm =T), VAS_sd = sd(VAS, na.rm =T),
OT_mean = mean(transAcc, na.rm =T), OT_sd = sd(transAcc, na.rm =T)) %>%
pivot_longer(cols = VSA_mean:OT_sd, names_to = "Measure",
values_to = "Value") %>%
dplyr::mutate(Value = round(Value, digits = 2),
meanSD = ifelse(grepl("_mean",Measure),"M","sd"),
Measure = gsub("_mean","",Measure),
Measure = gsub("_sd","",Measure),
Etiology = paste(Etiology,meanSD, sep = "_"),
Sex = case_when(
Sex == "All" ~ "All Speakers",
Sex == "M" ~ "Male Speakers",
Sex == "F" ~ "Female Speakers"
)) %>%
dplyr::select(!meanSD) %>%
pivot_wider(names_from = Etiology, values_from = "Value") %>%
dplyr::filter(Measure != "VSD50")
`summarise()` has grouped output by 'Sex'. You can override using the `.groups` argument.
gtData %>%
gt::gt(
rowname_col = "Measure",
groupname_col = "Sex",
) %>%
fmt_number(
columns = 'All Etiologies_M':PD_sd,
decimals = 2
) %>%
tab_spanner(
label = "All Etiologies",
columns = c('All Etiologies_M', 'All Etiologies_sd')
) %>%
tab_spanner(
label = "ALS",
columns = c(ALS_M, ALS_sd)
) %>%
tab_spanner(
label = "PD",
columns = c(PD_M, PD_sd)
) %>%
tab_spanner(
label = "HD",
columns = c(HD_M, HD_sd)
) %>%
tab_spanner(
label = "Ataxic",
columns = c(Ataxic_M, Ataxic_sd)
) %>%
gt::cols_move_to_start(
columns = c('All Etiologies_M','All Etiologies_sd')
) %>%
row_group_order(
groups = c("All Speakers", "Female Speakers", "Male Speakers")
) %>%
cols_label(
'All Etiologies_M' = "M",
'All Etiologies_sd' = "SD",
ALS_M = "M",
ALS_sd = "SD",
PD_M = "M",
PD_sd = "SD",
HD_M = "M",
HD_sd = "SD",
Ataxic_M = "M",
Ataxic_sd = "SD"
) %>%
gtsave("DescriptivesTable.html", path = "Tables")
sjPlot::tab_model(OT_Model1,
OT_Model2,
OT_Model3b,
OT_Model4,
OT_Model5,
OT_Model_final,
show.ci = F,
p.style = "stars",
file = "Tables/OT Models.html")
sjPlot::tab_model(VAS_Model1,
VAS_Model2,
VAS_Model3,
VAS_Model4,
VAS_Model5,
VAS_Model_final,
show.ci = F,
p.style = "stars",
file = "Tables/VAS Models.html")
sjPlot::tab_model(OT_VAS_model,OT_VAS_final,
show.ci = F,
show.reflvl = TRUE,
p.style = "stars",
file = "Tables/OT and VAS Comparison.html")
formantColor <- "grey"
formantAlpha <- .95
lineColor <- "white"
lineAlpha <- .8
vowelData <- rio::import("Prepped Data/Vowel Data.csv") %>%
dplyr::filter(Speaker == "AF8")
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Formants_PRAAT <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
dplyr::filter(mDist_sd < 2) %>%
dplyr::select(!c(F1_mad, F2_mad, mDist, mDist_sd)) %>%
dplyr::mutate(F1_z = scale(F1_Hz, center = TRUE),
F2_z = scale(F2_Hz, center = TRUE),
F3_z = scale(F3_Hz, center = TRUE),
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz))
rm(Pitch_PRAAT)
## Corner Dispersion ----
wedge <- vowelData %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid)) %>%
dplyr::filter(Vowel == "v")
corner_dis <- vowelData %>%
dplyr::filter(Vowel != "v") %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid)) %>%
dplyr::mutate(Vowel_ED = sqrt((mean_F1-wedge$mean_F1)^2 + (mean_F2-wedge$mean_F2)^2),
Vowel_ED_z = sqrt((mean_F1_z-wedge$mean_F1_z)^2 + (mean_F2_z-wedge$mean_F2_z)^2),
Vowel_ED_b = sqrt((mean_F1_b-wedge$mean_F1_b)^2 + (mean_F2_b-wedge$mean_F2_b)^2))
# Plot Corner Dispersion
# Changing to IPA symbols
corner_dis <- corner_dis %>%
dplyr::mutate(Vowel = dplyr::case_when(
Vowel == "ae" ~ "æ",
TRUE ~ Vowel
))
wedge <- wedge %>%
dplyr::mutate(Vowel = case_when(
Vowel == "v" ~ "ʌ",
TRUE ~ Vowel
))
CDplot <- ggplot(aes(x=F2_b,
y=F1_b),
data = Formants_PRAAT,
inherit.aes = FALSE) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "i") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "a") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "æ") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_line(aes(x = mean_F2_b,
y = mean_F1_b),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
dplyr::filter(Vowel == "u") %>%
rbind(.,wedge),
color = lineColor,
size = 1.5,
alpha = lineAlpha) +
geom_point(aes(x = mean_F2_b,
y = mean_F1_b,
color = Vowel),
data = corner_dis %>%
dplyr::select(Vowel:mean_F2_b) %>%
rbind(.,wedge),
inherit.aes = FALSE,
size = 5) +
scale_y_reverse() +
scale_x_reverse() +
theme_classic() + labs(title = paste("Corner Dispersion")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
scale_color_manual(values = c("a" = "#1AAD77",
"æ" = "#1279B5",
"i" = "#FFBF00",
"u" = "#FD7853",
"ʌ" = "#BF3178"))
CDplot
rm(corner_dis, wedge)
## Vowel Space Area ----
VSA_coords <- vowelData %>%
dplyr::filter(Vowel != "v") %>%
dplyr::group_by(Vowel) %>%
dplyr::summarize(mean_F1 = mean(F1_tempMid),
mean_F2 = mean(F2_tempMid),
mean_F1_z = mean(F1_z_tempMid),
mean_F2_z = mean(F2_z_tempMid),
mean_F1_b = mean(F1_b_tempMid),
mean_F2_b = mean(F2_b_tempMid))
### Plotting VSA
VSA_coords <- VSA_coords %>%
dplyr::mutate(Vowel = case_when(
Vowel == "ae" ~ "æ",
TRUE ~ Vowel
))
VSAplot <- ggplot(aes(x = F2_b,
y = F1_b),
data = Formants_PRAAT,
inherit.aes = FALSE) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_polygon(aes(x = mean_F2_b,
y = mean_F1_b),
data = VSA_coords,
alpha = lineAlpha,
color = lineColor,
fill=NA,
size = 1.5) +
geom_point(aes(x = mean_F2_b,
y = mean_F1_b,
color = Vowel),
data = VSA_coords,
inherit.aes = FALSE,
size = 5) +
scale_y_reverse() +
scale_x_reverse() +
guides(color = FALSE) +
theme_classic() + labs(title = "VSA") + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
scale_color_manual(values = c("a" = "#1AAD77",
"æ" = "#1279B5",
"i" = "#FFBF00",
"u" = "#FD7853"))
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
VSAplot
rm(VSA_coords)
## Hull ----
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21,
alpha = formantAlpha,
color = formantColor) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
scale_y_reverse() +
scale_x_reverse() +
theme_classic() + labs(title = expression("VSA"[Hull])) +
xlab("F2 (Bark)") +
ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
hullPlot
## Vowel Space Density ----
# Bark Normalized Density ----
# selecting the bandwidth
H_hpi <- ks::Hpi(x = Formants_PRAAT[,c("F2_b","F1_b")], pilot = "samse", pre = "scale", binned = T)
# compute 2d kde
k <- kde(x = Formants_PRAAT[,c("F2_b","F1_b")],
H = H_hpi,
binned = T,
gridsize = 250)
#density <- k[["estimate"]]
# Before we can plot the density estimate we need to melt it into long format
mat.melted <- data.table::melt(k$estimate)
The melt generic in data.table has been passed a matrix and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(k$estimate). In the next version, this warning will become an error.
names(mat.melted) <- c("x", "y", "density")
# We need to add two more colums to preserve the axes units
mat.melted$F2.b <- rep(k$eval.points[[1]], times = nrow(k$estimate))
mat.melted$F1.b <- rep(k$eval.points[[2]], each = nrow(k$estimate))
mat.melted$density <- scales::rescale(mat.melted$density, to = c(0, 1))
# VSD - 25
nVSD_25 <- mat.melted %>%
dplyr::filter(density > .25) %>%
dplyr::select(F2.b,F1.b, density) %>%
dplyr::rename(Density = density)
convexCoords <- nVSD_25 %>%
dplyr::select(F2.b, F1.b) %>%
as.matrix() %>%
#grDevices::xy.coords() %>%
grDevices::chull()
nconvex_25 <- nVSD_25 %>%
slice(convexCoords)
# VSD - 75
nVSD_75 <- mat.melted %>%
dplyr::filter(density > .75) %>%
dplyr::select(F2.b,F1.b, density) %>%
dplyr::rename(Density = density)
convexCoords <- nVSD_75 %>%
dplyr::select(F2.b, F1.b) %>%
as.matrix() %>%
grDevices::chull()
nconvex_75 <- nVSD_75 %>%
slice(convexCoords)
# Plotting Z Normalized VSD
rf <- colorRampPalette(rev(RColorBrewer::brewer.pal(11, "Spectral")))
r <- rf(32)
plotData <- mat.melted %>%
dplyr::rename(Density = density) %>%
dplyr::mutate(VSDlabel = dplyr::case_when(
Density < .25 ~ "none",
Density > .25 && Density < .75 ~ "VSD25",
TRUE ~ "VSD75"
))
geom.text.size <- 2
VSDplot <- ggplot(data = plotData,
aes(x = F2.b,
y = F1.b,
fill = Density)) +
geom_tile() +
scale_fill_viridis_c() +
scale_x_reverse(expand = c(0, 0),
breaks = round(seq(min(mat.melted$F2.b),
max(mat.melted$F2.b), by = 2))) +
scale_y_reverse(expand = c(0, 0),
breaks = round(seq(min(mat.melted$F1.b),
max(mat.melted$F1.b), by = 2))) +
ylab("F1 (Bark)") + xlab("F2 (Bark)") +
labs(title = "VSD") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1) +
geom_polygon(data = nconvex_25, alpha = lineAlpha, color = lineColor, size = 1.5, fill = NA, linetype = 2) +
geom_polygon(data = nconvex_75, alpha = lineAlpha, color = lineColor, size = 1.5, fill = NA, linetype = 1) +
# VSD 25 Label
annotate(geom = "curve",
x = 6.9, y = 1.7+.5,
xend = 8.5, yend = 3.5,
curvature = -.3,
arrow = arrow(length = unit(2, "mm")),
color = "white") +
annotate(geom = "text",
x = 7.5, y = 1.7,
label = deparse(bquote(VSD[25])),
hjust = "center",
color = "white",
parse=TRUE) +
# VSD 75 Label
annotate(geom = "curve",
x = 7.5, y = 7.5-.5,
xend = 11.35, yend = 5.5,
curvature = .3,
arrow = arrow(length = unit(2, "mm")),
color = "white") +
annotate(geom = "text",
x = 7, y = 7.5,
label = deparse(bquote(VSD[75])),
hjust = "center",
color = "white",
parse = TRUE)
VSDplot
# Combined Plot
row1 <- VSAplot + CDplot +
patchwork::plot_layout(guides = 'collect',
ncol = 2) & theme(legend.position = 'right')
row2 <- hullPlot + VSDplot +
patchwork::plot_layout(guides = 'collect',
ncol = 2) & theme(legend.position = 'right')
measuresPlot <- row1 / row2 + patchwork::plot_layout(heights = c(1/2, 1/2), byrow = FALSE)
measuresPlot
rm(row1, row2)
ggsave(filename = "Plots/Measures.png",
plot = measuresPlot,
height = 5.5,
width = 6,
scale = .8)
formantAlpha <- .20
myPal <- c("#1279B5","#2D2D37")
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data/", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
# Raw Formants ----
f1 <- ggplot(aes(x=F2_b,
y=F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21, color = myPal[2]) +
scale_y_reverse(limits = c(16,0)) +
scale_x_reverse(limits = c(19,3)) +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Raw Formant\nValues")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step #1: Voiced Segments ----
plotData <- Formants_PRAAT %>%
dplyr::mutate(isOutlier = case_when(
is.na(Pitch) ~ "Removed",
TRUE ~ "Retained"
))
f2 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse(limits = c(16,0)) +
scale_x_reverse(limits = c(19,3)) +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Voiced Segments")) +
xlab("F2 (Bark)") +
ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step 2: MAD ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5,
isOutlier = case_when(
F1_mad == TRUE | F2_mad == TRUE ~ "Removed",
TRUE ~ "Retained"
))
f3 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse(limits = c(16,0)) +
scale_x_reverse(limits = c(19,3)) +
scale_color_manual(values = myPal) +
theme_classic() +
labs(title = paste("Median Absolute\nDeviation")) +
xlab("F2 (Bark)") +
ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Step 3: Mahalanhobis Distance ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T)),
isOutlier = case_when(
mDist_sd < 2 ~ "Retained",
TRUE ~ "Removed"
))
f4 <- ggplot(data = plotData,
aes(x = F2_b,
y = F1_b,
color = isOutlier)) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Removed")) +
geom_point(shape = 21, data = plotData %>%
dplyr::filter(isOutlier == "Retained")) +
scale_y_reverse(limits = c(16,0)) +
scale_x_reverse(limits = c(19,3)) +
scale_color_manual(values = myPal) +
theme_classic() + labs(title = paste("Mahalanobis\nDistance")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1,
legend.title = element_blank(),
legend.text = element_text(size=12))
# Final Formants ----
plotData <- Formants_PRAAT %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
dplyr::filter(mDist_sd < 2)
f5 <- ggplot(aes(x=F2_b,
y=F1_b),
data = plotData,
inherit.aes = FALSE) +
geom_point(shape = 21, color = myPal[2]) +
scale_y_reverse(limits = c(16,0)) +
scale_x_reverse(limits = c(19,3)) +
theme_classic() + labs(title = paste("Final Formant\nTrajectories")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1, legend.title = element_blank())
# Comibing plots
filteredPlot <- f1 + f2 + f3 + f4 + f5 + patchwork::guide_area() +
patchwork::plot_layout(guides = 'collect',
ncol = 3) +
patchwork::plot_annotation(tag_levels = 'A')
filteredPlot
ggsave(plot = filteredPlot, "Plots/Filtered Formants.png",
height = 6,
width = 8,
units = "in",
scale = .9)
NA
plotData_Int <- AcousticData %>%
dplyr::filter(!grepl("_rel", Speaker)) %>%
dplyr::group_by(Speaker) %>%
dplyr::mutate(segMin = base::min(VAS, transAcc),
segMax = base::max(VAS, transAcc),
ratingAvg = mean(VAS, transAcc, na.rm = T),
Speaker = as.factor(Speaker),
Etiology = case_when(
Etiology == "Ataxic" ~ "Ataxia",
TRUE ~ as.character(Etiology)
),
Etiology = as.factor(Etiology)) %>%
arrange(segMax)
my_pal <- c("#f26430", "#272D2D","#256eff")
# With a bit more style
plot_Int <- ggplot(plotData_Int) +
geom_segment(aes(x = fct_inorder(Speaker),
xend = Speaker,
y = segMin,
yend = segMax,
color = Etiology)) +
geom_point(aes(x = Speaker,
y = VAS,
color = Etiology),
#color = my_pal[1],
size = 3,
shape = 19) +
geom_point(aes(x = Speaker,
y = transAcc,
color = Etiology),
#color = my_pal[2],
size = 3,
shape = 15) +
coord_flip()+
theme_classic() +
theme(
legend.position = "none",
panel.border = element_blank(),
) +
xlab("") +
ylab("Speech Intelligibility") +
ggtitle("Speech Intelligibility") +
ylim(c(0,100))
myPal <- c("#1AAD77", "#1279B5", "#FFBF00", "#FD7853", "#BF3178")
myShapes <- c(16, 18, 17, 15)
OT_VASscatter <- ggplot(plotData_Int,
aes(x = VAS,
y = transAcc,
color = Etiology,
shape = Etiology,
linetype = Etiology)) +
geom_point() +
geom_smooth(method = "lm", se = F) +
geom_abline(intercept = 0, slope = 1) +
coord_cartesian(xlim = c(0,100), ylim = c(0,100)) +
labs(x = "Intelligibility (VAS)", y = "Intelligibility (OT)") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myShapes) +
theme_classic() +
theme(aspect.ratio=1,
legend.position="right")
ggsave(filename = "Plots/OT and VAS Scatterplot.png",
plot = OT_VASscatter,
height = 3.25,
width = 4,
units = "in",
scale = 1)
rm(scatter1, scatter2, combinedScatter)
modelFigureData <- AcousticData %>%
dplyr::filter(!grepl("_rel",Speaker)) %>%
dplyr::select(Speaker, Etiology, Sex, VSA_b, vowel_ED_b, Hull_b, Hull_bVSD_25, Hull_bVSD_75, VAS, transAcc) %>%
dplyr::mutate(Speaker = as.factor(Speaker),
Etiology = as.factor(Etiology),
Sex = as.factor(Sex)) %>%
tidyr::pivot_longer(cols = VAS:transAcc, names_to = "IntType", values_to = "Int") %>%
dplyr::mutate(IntType = case_when(
IntType == "transAcc" ~ "OT",
TRUE ~ "VAS"
),
IntType = as.factor(IntType))
ylabel <- "Intelligibility"
myPal <- c("#2D2D37", "#1279B5")
myPalShape <- c(19, 1)
VSA <- modelFigureData %>%
ggplot() +
aes(x = VSA_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
geom_smooth(method = "lm", se = F) +
xlab(expression("VSA (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
aspect.ratio=1) +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
disp <- modelFigureData %>%
ggplot() +
aes(x = vowel_ED_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
geom_smooth(method = "lm", se = F) +
xlab("Corner Dispersion (Bark)") +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
Hull <- modelFigureData %>%
ggplot() +
aes(x = Hull_b,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
geom_smooth(method = "lm", se = F) +
xlab(expression("VSA"[Hull]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
vsd25 <- modelFigureData %>%
ggplot() +
aes(x = Hull_bVSD_25,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
geom_smooth(method = "lm", se = F) +
xlab(expression("VSD"[25]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
vsd75 <- modelFigureData %>%
ggplot() +
aes(x = Hull_bVSD_75,
y = Int,
color = IntType,
shape = IntType,
linetype = IntType) +
geom_point() +
geom_smooth(method = "lm", se = T, fill = "light grey") +
geom_smooth(method = "lm", se = F) +
xlab(expression("VSD"[75]*" (Bark"^2*")")) +
ylab(ylabel) +
coord_cartesian(ylim = c(0,100)) +
theme_classic() +
theme(aspect.ratio=1) + theme(legend.position = "none") +
scale_color_manual(values = myPal) +
scale_shape_manual(values = myPalShape) +
labs(color="Intelligibility Type",
shape = "Intelligibility Type",
linetype = "Intelligibility Type")
# Creating OT Scatterplot Figure
scatter <- VSA + disp + patchwork::guide_area() + Hull + vsd25 + vsd75 +
patchwork::plot_layout(guides = 'collect',
ncol = 3) & theme(legend.position = "right")
scatter
ggsave("Plots/ModelFigure.png", scatter,
height = 4,
width = 6,
units = "in",
scale = 1.1)
text_x <- 12.5
text_y <- 8.5
xlims <- c(16,5)
ylims <- c(9,1)
# Hull - 2 SD ----
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT) %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
#dplyr::mutate(mDistOutlier = (stats::pchisq(mDist, df=1, lower.tail=FALSE)) < .001) %>%
dplyr::filter(mDist_sd < 2)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Hull_b <- cHull(Formants_PRAAT$F1_b, Formants_PRAAT$F2_b)
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot_2 <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
annotate("text", x = text_x, y = text_y, label = paste("Hull =",round(Hull_b,2))) +
scale_y_reverse() +
scale_x_reverse() +
xlim(xlims) +
ylim(ylims) +
theme_classic() + labs(title = paste("2 SD")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.
hullPlot_2
# Hull - 2.5 SD ----
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT) %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
#dplyr::mutate(mDistOutlier = (stats::pchisq(mDist, df=1, lower.tail=FALSE)) < .001) %>%
dplyr::filter(mDist_sd < 2.5)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Hull_b <- cHull(Formants_PRAAT$F1_b, Formants_PRAAT$F2_b)
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot_2.5 <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
annotate("text", x = text_x, y = text_y, label = paste("Hull =",round(Hull_b,2))) +
scale_y_reverse() +
scale_x_reverse() +
xlim(xlims) +
ylim(ylims) +
theme_classic() + labs(title = paste("2.5 SD")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.
hullPlot_2.5
# Hull - 3 SD ----
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT) %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
#dplyr::mutate(mDistOutlier = (stats::pchisq(mDist, df=1, lower.tail=FALSE)) < .001) %>%
dplyr::filter(mDist_sd < 3)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Hull_b <- cHull(Formants_PRAAT$F1_b, Formants_PRAAT$F2_b)
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot_3 <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
annotate("text", x = text_x, y = text_y, label = paste("Hull =",round(Hull_b,2))) +
scale_y_reverse() +
scale_x_reverse() +
xlim(xlims) +
ylim(ylims) +
theme_classic() + labs(title = paste("3 SD")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.
hullPlot_3
# Hull - 1.5 SD ----
Pitch_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = ".Pitch", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = F) %>%
dplyr::rename(Pitch = V1) %>%
dplyr::mutate(Pitch = gsub("--undefined--",NA,Pitch),
Pitch = as.numeric(Pitch))
Formants_PRAAT <- list.files(path = paste("Prepped Data/Example Data", sep = ""),
pattern = "_Formant", ignore.case = T) %>%
paste("Prepped Data/Example Data/",., sep = "") %>%
read.delim(., header = T) %>%
dplyr::select(!c(nformants, B1.Hz., B2.Hz., B3.Hz., F4.Hz., B4.Hz., F5.Hz., B5.Hz.)) %>%
dplyr::rename(Time_s = time.s.,
F1_Hz = F1.Hz.,
F2_Hz = F2.Hz.,
F3_Hz = F3.Hz.) %>%
dplyr::mutate(F1_Hz = ifelse(F1_Hz == 0, NA, F1_Hz),
F2_Hz = ifelse(F2_Hz == 0, NA, F2_Hz),
F3_Hz = ifelse(F3_Hz == 0, NA, F3_Hz)) %>%
dplyr::mutate(F1_Hz = as.numeric(F1_Hz),
F2_Hz = as.numeric(F2_Hz),
F3_Hz = suppressWarnings(as.numeric(F3_Hz)),
Time_ms = Time_s / 1000,
F1_kHz = F1_Hz / 1000,
F2_kHz = F2_Hz / 1000,
F3_kHz = F3_Hz / 1000,
F1_b = emuR::bark(F1_Hz),
F2_b = emuR::bark(F2_Hz),
F3_b = emuR::bark(F3_Hz)) %>%
dplyr::select(!Time_s) %>%
dplyr::relocate(Time_ms, .before = F1_Hz) %>%
cbind(.,Pitch_PRAAT) %>%
dplyr::filter(!is.na(Pitch)) %>%
dplyr::mutate(F1_mad = (abs(F1_Hz - median(F1_Hz))/ mad(F1_Hz, constant = 1.4826)) > 2.5,
F2_mad = (abs(F2_Hz - median(F2_Hz))/ mad(F2_Hz, constant = 1.4826)) > 2.5) %>%
dplyr::filter(F1_mad == FALSE & F2_mad == FALSE) %>%
dplyr::mutate(mDist = mahalanobis(cbind(.$F1_Hz, .$F2_Hz),
colMeans(cbind(.$F1_Hz, .$F2_Hz)),
cov = cov(cbind(.$F1_Hz, .$F2_Hz))),
mDist_sd = abs(scale(mDist,center = T))) %>%
#dplyr::mutate(mDistOutlier = (stats::pchisq(mDist, df=1, lower.tail=FALSE)) < .001) %>%
dplyr::filter(mDist_sd < 1.5)
c <- 2
while(c < NROW(Formants_PRAAT)){
Formants_PRAAT$F1_Hz[c] <- ifelse(is.na(Formants_PRAAT$F1_Hz[c-1]) &&
is.na(Formants_PRAAT$F1_Hz[c+1]),
NA,
Formants_PRAAT$F1_Hz[c])
Formants_PRAAT$F2_Hz[c] <- ifelse(is.na(Formants_PRAAT$F2_Hz[c-1]) &&
is.na(Formants_PRAAT$F2_Hz[c+1]),
NA,
Formants_PRAAT$F2_Hz[c])
c <- c + 1
}
rm(c)
Hull_b <- cHull(Formants_PRAAT$F1_b, Formants_PRAAT$F2_b)
### Plotting Hull
convexCoords <- Formants_PRAAT %>%
dplyr::select(F1_b, F2_b) %>%
as.matrix() %>%
grDevices::chull()
convex <- Formants_PRAAT %>%
slice(convexCoords)
hullPlot_1.5 <- ggplot(aes(F2_b, F1_b),
data = Formants_PRAAT) +
geom_point(shape = 21) +
geom_polygon(data = convex,
alpha = .5,
color = "#1279B5",
fill = NA,
size = 1.5) +
annotate("text", x = text_x, y = text_y, label = paste("Hull =",round(Hull_b,2))) +
scale_y_reverse() +
scale_x_reverse() +
xlim(xlims) +
ylim(ylims) +
theme_classic() + labs(title = paste("1.5 SD")) + xlab("F2 (Bark)") + ylab("F1 (Bark)") +
theme(plot.title = element_text(hjust = 0.5),
aspect.ratio = 1)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the
existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the
existing scale.
hullPlot_1.5
# Combined ----
ggpubr::ggarrange(hullPlot_1.5, hullPlot_2, hullPlot_2.5, hullPlot_3,
ncol = 4)
ggsave(filename = "Plots/Hull at Different Filters.png",
height = 3,
width= 9,
units = "in",
scale = 1)
NA
ListenerDemo <- Listeners %>%
furniture::table1(age, gender, race, ethnicity)
ListenerDemo
SpeakerDemo <- AcousticData %>%
dplyr::select(c(Speaker, Sex, Etiology))
Ages <- rio::import("Prepped Data/Speaker Ages.xlsx")
SpeakerDemo <- full_join(SpeakerDemo, Ages, by = "Speaker")
SpeakerDemoInfo <- SpeakerDemo %>%
furniture::table1(Sex, Etiology, Age, na.rm = F)
SpeakerDemoInfo
SpeakerDemo %>%
dplyr::summarize(mean_age = mean(Age, na.rm = T), age_sd = sd(Age, na.rm = T), age_range = range(Age, na.rm = T))